# Count # of wells in each county during 2001-2011 according to the completion date (or the first production date).
# Input: processed original data file

BEGIN {
  FS = "[ \t]*,[ \t]*"
  OFS = ","
}

{
  if (FNR == 1)
    next
  county = $1
  # count based on completion date
  # split ($6, date, "/")
  # count based on spud date
  split ($2, date, "/")
  year = date[3]
  month = date[1]
  days = $9
  
  wellCounts[county,year,month]++
  totalDays[county,year,month] = totalDays[county,year,month] + $9 
  
  # if ($9 ~ /G/)
  #   gas[county,year,month]++

  # else if ($9 ~ /O/)
  #   oil[county,year,month]++
  # else
  #   others[county,year,month]++
}

END {
  for (county_year_month in wellCounts) {
    aveDays[county_year_month] =  totalDays[county_year_month]/wellCounts[county_year_month]
    split(county_year_month, cy, SUBSEP)
    print cy[1], cy[2], cy[3], wellCounts[county_year_month],  totalDays[county_year_month], aveDays[county_year_month]
  }
}

